From f32bc42de344e0c9e1949fb8cc5e3ce042dae16e Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Thu, 3 Feb 2005 13:07:34 +0000 Subject: [PATCH] bitkeeper revision 1.1159.212.77 (4202221693AFbvFZWeMHHIjQfbzTIQ) More x86_64 prgress. Many more gaps filled in. Next step is DOM0 construction. Signed-off-by: keir.fraser@cl.cam.ac.uk --- .../drivers/xen/privcmd/privcmd.c | 12 ++- xen/arch/x86/boot/x86_64.S | 7 +- xen/arch/x86/domain.c | 28 +++--- xen/arch/x86/memory.c | 34 +------- xen/arch/x86/setup.c | 3 + xen/arch/x86/traps.c | 21 +---- xen/arch/x86/x86_32/mm.c | 52 +++++++++--- xen/arch/x86/x86_32/traps.c | 21 +++++ xen/arch/x86/x86_64/entry.S | 6 +- xen/arch/x86/x86_64/mm.c | 85 ++++++++++++++----- xen/arch/x86/x86_64/traps.c | 75 ++++++++++++++++ xen/include/asm-x86/desc.h | 5 +- xen/include/asm-x86/mm.h | 3 - xen/include/asm-x86/processor.h | 12 ++- xen/include/asm-x86/x86_64/regs.h | 8 +- xen/include/public/arch-x86_32.h | 5 +- xen/include/public/arch-x86_64.h | 49 ++++++----- 17 files changed, 277 insertions(+), 149 deletions(-) diff --git a/linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c b/linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c index 98e7e92ff4..c97fe7cf21 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c +++ b/linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c @@ -174,13 +174,11 @@ static int privcmd_ioctl(struct inode *inode, struct file *file, case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN: { - unsigned long m2p_start_mfn = - HYPERVISOR_shared_info->arch.mfn_to_pfn_start; - - if( put_user( m2p_start_mfn, (unsigned long *) data ) ) - ret = -EFAULT; - else - ret = 0; + unsigned long m2pv = (unsigned long)machine_to_phys_mapping; + pgd_t *pgd = pgd_offset_k(m2pv); + pmd_t *pmd = pmd_offset(pgd, m2pv); + unsigned long m2p_start_mfn = pmd_val(*pmd) >> PAGE_SHIFT; + ret = put_user(m2p_start_mfn, (unsigned long *)data) ? -EFAULT: 0; } break; diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S index 2452973e87..fa9ea1b5c1 100644 --- a/xen/arch/x86/boot/x86_64.S +++ b/xen/arch/x86/boot/x86_64.S @@ -249,16 +249,11 @@ ENTRY(cpu0_stack) # Initial stack is 8kB ENTRY(stext) ENTRY(_stext) -.globl switch_to, do_iopl -switch_to: -do_iopl: -.globl copy_from_user, copy_to_user, copy_user_generic, new_thread +.globl copy_from_user, copy_to_user, copy_user_generic copy_from_user: copy_to_user: copy_user_generic: -new_thread: .globl __get_user_1, __get_user_4, __get_user_8 __get_user_1: __get_user_4: __get_user_8: - diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 1a5378de2f..fe7225861b 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -261,9 +261,8 @@ static void continue_nonidle_task(struct exec_domain *ed) void arch_do_createdomain(struct exec_domain *ed) { struct domain *d = ed->domain; -#ifdef ARCH_HAS_FAST_TRAP + SET_DEFAULT_FAST_TRAP(&ed->thread); -#endif if ( d->id == IDLE_DOMAIN_ID ) { @@ -276,7 +275,6 @@ void arch_do_createdomain(struct exec_domain *ed) d->shared_info = (void *)alloc_xenheap_page(); memset(d->shared_info, 0, PAGE_SIZE); ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid]; - d->shared_info->arch.mfn_to_pfn_start = m2p_start_mfn; SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d); machine_to_phys_mapping[virt_to_phys(d->shared_info) >> PAGE_SHIFT] = INVALID_P2M_ENTRY; @@ -453,10 +451,8 @@ int arch_final_setup_guestos(struct exec_domain *d, full_execution_context_t *c) &c->trap_ctxt, sizeof(d->thread.traps)); -#ifdef ARCH_HAS_FAST_TRAP if ( (rc = (int)set_fast_trap(d, c->fast_trap_idx)) != 0 ) return rc; -#endif d->mm.ldt_base = c->ldt_base; d->mm.ldt_ents = c->ldt_ents; @@ -498,8 +494,6 @@ int arch_final_setup_guestos(struct exec_domain *d, full_execution_context_t *c) return 0; } -#if defined(__i386__) /* XXX */ - void new_thread(struct exec_domain *d, unsigned long start_pc, unsigned long start_stack, @@ -515,8 +509,8 @@ void new_thread(struct exec_domain *d, * ESI = start_info * [EAX,EBX,ECX,EDX,EDI,EBP are zero] */ - ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_RING1_DS; - ec->cs = FLAT_RING1_CS; + ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_GUESTOS_DS; + ec->cs = FLAT_GUESTOS_CS; ec->eip = start_pc; ec->esp = start_stack; ec->esi = start_info; @@ -530,18 +524,19 @@ void new_thread(struct exec_domain *d, * This special macro can be used to load a debugging register */ #define loaddebug(thread,register) \ - __asm__("movl %0,%%db" #register \ + __asm__("mov %0,%%db" #register \ : /* no output */ \ :"r" (thread->debugreg[register])) - void switch_to(struct exec_domain *prev_p, struct exec_domain *next_p) { struct thread_struct *next = &next_p->thread; struct tss_struct *tss = init_tss + smp_processor_id(); execution_context_t *stack_ec = get_execution_context(); int i; +#ifdef CONFIG_VMX unsigned long vmx_domain = next_p->thread.arch_vmx.flags; +#endif __cli(); @@ -573,7 +568,9 @@ void switch_to(struct exec_domain *prev_p, struct exec_domain *next_p) loaddebug(next, 7); } - if (vmx_domain) { +#ifdef CONFIG_VMX + if ( vmx_domain ) + { /* Switch page tables. */ write_ptbase(&next_p->mm); @@ -583,13 +580,16 @@ void switch_to(struct exec_domain *prev_p, struct exec_domain *next_p) __sti(); return; - } + } +#endif SET_FAST_TRAP(&next_p->thread); +#ifdef __i386__ /* Switch the guest OS ring-1 stack. */ tss->esp1 = next->guestos_sp; tss->ss1 = next->guestos_ss; +#endif /* Switch page tables. */ write_ptbase(&next_p->mm); @@ -632,8 +632,6 @@ long do_iopl(domid_t domain, unsigned int new_io_pl) return 0; } -#endif - unsigned long hypercall_create_continuation( unsigned int op, unsigned int nr_args, ...) { diff --git a/xen/arch/x86/memory.c b/xen/arch/x86/memory.c index d3a4e8dc9c..e855210973 100644 --- a/xen/arch/x86/memory.c +++ b/xen/arch/x86/memory.c @@ -168,31 +168,10 @@ void __init init_frametable(void) void arch_init_memory(void) { -#ifdef __i386__ /* XXX */ - unsigned long i; - - /* - * We are rather picky about the layout of 'struct pfn_info'. The - * count_info and domain fields must be adjacent, as we perform atomic - * 64-bit operations on them. Also, just for sanity, we assert the size - * of the structure here. - */ - if ( (offsetof(struct pfn_info, u.inuse.domain) != - (offsetof(struct pfn_info, count_info) + sizeof(u32))) || - (sizeof(struct pfn_info) != 24) ) - { - printk("Weird pfn_info layout (%ld,%ld,%d)\n", - offsetof(struct pfn_info, count_info), - offsetof(struct pfn_info, u.inuse.domain), - sizeof(struct pfn_info)); - for ( ; ; ) ; - } + extern void subarch_init_memory(struct domain *); memset(percpu_info, 0, sizeof(percpu_info)); - /* Initialise to a magic of 0x55555555 so easier to spot bugs later. */ - memset(machine_to_phys_mapping, 0x55, 4<<20); - /* * Initialise our DOMID_XEN domain. * Any Xen-heap pages that we will allow to be mapped will have @@ -211,16 +190,7 @@ void arch_init_memory(void) atomic_set(&dom_io->refcnt, 1); dom_io->id = DOMID_IO; - /* M2P table is mappable read-only by privileged domains. */ - for ( i = 0; i < 1024; i++ ) - { - frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1; - /* gdt to make sure it's only mapped read-only by non-privileged - domains. */ - frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1; - frame_table[m2p_start_mfn+i].u.inuse.domain = dom_xen; - } -#endif + subarch_init_memory(dom_xen); } static void __invalidate_shadow_ldt(struct exec_domain *d) diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index eaad036759..bf3b90ce1e 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -459,6 +459,9 @@ static void __init start_of_day(void) #endif watchdog_on = 1; +#ifdef __x86_64__ /* x86_32 uses low mappings when building DOM0. */ + zap_low_mappings(); +#endif } void __init __start_xen(multiboot_info_t *mbi) diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 495b88f188..4f7f91bafe 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -329,26 +329,8 @@ asmlinkage int do_page_fault(struct xen_regs *regs) DEBUGGER_trap_fatal(TRAP_page_fault, regs); -#ifdef __i386__ - if ( addr >= PAGE_OFFSET ) - { - unsigned long page; - page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]); - printk("*pde = %p\n", page); - if ( page & _PAGE_PRESENT ) - { - page &= PAGE_MASK; - page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT]; - printk(" *pte = %p\n", page); - } -#ifdef MEMORY_GUARD - if ( !(regs->error_code & 1) ) - printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n"); -#endif - } -#endif /* __i386__ */ - show_registers(regs); + show_page_walk(addr); panic("CPU%d FATAL PAGE FAULT\n" "[error_code=%04x]\n" "Faulting linear address might be %p\n", @@ -749,7 +731,6 @@ void __init trap_init(void) set_intr_gate(TRAP_deferred_nmi,&nmi); #if defined(__i386__) - set_task_gate(TRAP_double_fault,__DOUBLEFAULT_TSS_ENTRY<<3); _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall); #elif defined(__x86_64__) _set_gate(idt_table+HYPERCALL_VECTOR, 14, 3, &hypercall); diff --git a/xen/arch/x86/x86_32/mm.c b/xen/arch/x86/x86_32/mm.c index 8e1375d242..5102488874 100644 --- a/xen/arch/x86/x86_32/mm.c +++ b/xen/arch/x86/x86_32/mm.c @@ -27,8 +27,6 @@ #include #include -unsigned long m2p_start_mfn; - /* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */ int map_pages( pagetable_t *pt, @@ -97,16 +95,16 @@ void __init paging_init(void) /* Allocate and map the machine-to-phys table. */ if ( (pg = alloc_domheap_pages(NULL, 10)) == NULL ) panic("Not enough memory to bootstrap Xen.\n"); - m2p_start_mfn = page_to_pfn(pg); - idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] = + idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)] = mk_l2_pgentry(page_to_phys(pg) | __PAGE_HYPERVISOR | _PAGE_PSE); + memset((void *)RDWR_MPT_VIRT_START, 0x55, 4UL << 20); /* Xen 4MB mappings can all be GLOBAL. */ if ( cpu_has_pge ) { for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) ) { - l2e = l2_pgentry_val(idle_pg_table[v >> L2_PAGETABLE_SHIFT]); + l2e = l2_pgentry_val(idle_pg_table[l2_table_offset(v)]); if ( l2e & _PAGE_PSE ) l2e |= _PAGE_GLOBAL; idle_pg_table[v >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(l2e); @@ -116,23 +114,22 @@ void __init paging_init(void) /* Create page table for ioremap(). */ ioremap_pt = (void *)alloc_xenheap_page(); clear_page(ioremap_pt); - idle_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] = + idle_pg_table[l2_table_offset(IOREMAP_VIRT_START)] = mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR); /* Create read-only mapping of MPT for guest-OS use. */ - idle_pg_table[RO_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] = + idle_pg_table[l2_table_offset(RO_MPT_VIRT_START)] = mk_l2_pgentry(l2_pgentry_val( - idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT]) & - ~_PAGE_RW); + idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]) & ~_PAGE_RW); /* Set up mapping cache for domain pages. */ mapcache = (unsigned long *)alloc_xenheap_page(); clear_page(mapcache); - idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] = + idle_pg_table[l2_table_offset(MAPCACHE_VIRT_START)] = mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR); /* Set up linear page table mapping. */ - idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = + idle_pg_table[l2_table_offset(LINEAR_PT_VIRT_START)] = mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR); } @@ -144,6 +141,39 @@ void __init zap_low_mappings(void) flush_tlb_all_pge(); } +void subarch_init_memory(struct domain *dom_xen) +{ + unsigned long i, m2p_start_mfn; + + /* + * We are rather picky about the layout of 'struct pfn_info'. The + * count_info and domain fields must be adjacent, as we perform atomic + * 64-bit operations on them. Also, just for sanity, we assert the size + * of the structure here. + */ + if ( (offsetof(struct pfn_info, u.inuse.domain) != + (offsetof(struct pfn_info, count_info) + sizeof(u32))) || + (sizeof(struct pfn_info) != 24) ) + { + printk("Weird pfn_info layout (%ld,%ld,%d)\n", + offsetof(struct pfn_info, count_info), + offsetof(struct pfn_info, u.inuse.domain), + sizeof(struct pfn_info)); + for ( ; ; ) ; + } + + /* M2P table is mappable read-only by privileged domains. */ + m2p_start_mfn = l2_pgentry_to_pagenr( + idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]); + for ( i = 0; i < 1024; i++ ) + { + frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1; + /* gdt to make sure it's only mapped read-only by non-privileged + domains. */ + frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1; + frame_table[m2p_start_mfn+i].u.inuse.domain = dom_xen; + } +} /* * Allows shooting down of borrowed page-table use on specific CPUs. diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c index 2e8bc1b3a0..17bd35e234 100644 --- a/xen/arch/x86/x86_32/traps.c +++ b/xen/arch/x86/x86_32/traps.c @@ -117,6 +117,25 @@ void show_registers(struct xen_regs *regs) show_stack((unsigned long *)®s->esp); } +void show_page_walk(unsigned long addr) +{ + unsigned long page; + + if ( addr < PAGE_OFFSET ) + return; + + printk("Pagetable walk from %p:\n", addr); + + page = l2_pgentry_val(idle_pg_table[l2_table_offset(addr)]); + printk(" L2 = %p %s\n", page, (page & _PAGE_PSE) ? "(4MB)" : ""); + if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) ) + return; + + page &= PAGE_MASK; + page = ((unsigned long *) __va(page))[l1_table_offset(addr)]; + printk(" L1 = %p\n", page); +} + #define DOUBLEFAULT_STACK_SIZE 1024 static struct tss_struct doublefault_tss; static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE]; @@ -173,6 +192,8 @@ void __init doublefault_init(void) tss->bitmap = IOBMP_INVALID_OFFSET; _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY, (unsigned long)tss, 235, 9); + + set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3); } long set_fast_trap(struct exec_domain *p, int idx) diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index c9722e95dd..5e3c156fe3 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -126,6 +126,10 @@ ENTRY(spurious_interrupt_bug) movl $TRAP_spurious_int,4(%rsp) jmp error_code +ENTRY(double_fault) + movl $TRAP_double_fault,4(%rsp) + jmp error_code + ENTRY(nmi) iret @@ -140,7 +144,7 @@ ENTRY(exception_table) .quad SYMBOL_NAME(do_bounds) .quad SYMBOL_NAME(do_invalid_op) .quad SYMBOL_NAME(math_state_restore) - .quad 0 # double fault + .quad SYMBOL_NAME(do_double_fault) .quad SYMBOL_NAME(do_coprocessor_segment_overrun) .quad SYMBOL_NAME(do_invalid_TSS) .quad SYMBOL_NAME(do_segment_not_present) diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index 6760595580..e57208009f 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -27,8 +27,6 @@ #include #include -unsigned long m2p_start_mfn; /* XXX Kill this (in 32-bit code also). */ - void *safe_page_alloc(void) { extern int early_boot; @@ -117,34 +115,86 @@ void __set_fixmap( void __init paging_init(void) { void *newpt; + unsigned long i, p, max; - /* Allocate and map the machine-to-phys table. */ - /* XXX TODO XXX */ + /* Map all of physical memory. */ + max = (max_page + (1UL << L2_PAGETABLE_SHIFT) - 1UL) & + ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); + map_pages(idle_pg_table, PAGE_OFFSET, 0, max, PAGE_HYPERVISOR); - /* Create page table for ioremap(). */ - newpt = (void *)alloc_xenheap_page(); - clear_page(newpt); - idle_pg_table[IOREMAP_VIRT_START >> L4_PAGETABLE_SHIFT] = - mk_l4_pgentry(__pa(newpt) | __PAGE_HYPERVISOR); + /* + * Allocate and map the machine-to-phys table. + * This also ensures L3 is present for ioremap(). + */ + for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) ) + { + p = alloc_boot_pages(1UL << L2_PAGETABLE_SHIFT, + 1UL << L2_PAGETABLE_SHIFT); + if ( p == 0 ) + panic("Not enough memory for m2p table\n"); + map_pages(idle_pg_table, RDWR_MPT_VIRT_START + i*8, p, + 1UL << L2_PAGETABLE_SHIFT, PAGE_HYPERVISOR); + memset((void *)(RDWR_MPT_VIRT_START + i*8), 0x55, + 1UL << L2_PAGETABLE_SHIFT); + } /* Create read-only mapping of MPT for guest-OS use. */ newpt = (void *)alloc_xenheap_page(); clear_page(newpt); - idle_pg_table[RO_MPT_VIRT_START >> L4_PAGETABLE_SHIFT] = + idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] = mk_l4_pgentry((__pa(newpt) | __PAGE_HYPERVISOR | _PAGE_USER) & ~_PAGE_RW); - /* XXX TODO: Copy appropriate L3 entries from RDWR_MPT_VIRT_START XXX */ + /* Copy the L3 mappings from the RDWR_MPT area. */ + p = l4_pgentry_val(idle_pg_table[l4_table_offset(RDWR_MPT_VIRT_START)]); + p &= PAGE_MASK; + p += l3_table_offset(RDWR_MPT_VIRT_START) * sizeof(l3_pgentry_t); + newpt = (void *)((unsigned long)newpt + + (l3_table_offset(RO_MPT_VIRT_START) * + sizeof(l3_pgentry_t))); + memcpy(newpt, __va(p), + (RDWR_MPT_VIRT_END - RDWR_MPT_VIRT_START) >> L3_PAGETABLE_SHIFT); /* Set up linear page table mapping. */ - idle_pg_table[LINEAR_PT_VIRT_START >> L4_PAGETABLE_SHIFT] = + idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] = mk_l4_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR); } void __init zap_low_mappings(void) { idle_pg_table[0] = mk_l4_pgentry(0); + flush_tlb_all_pge(); } +void subarch_init_memory(struct domain *dom_xen) +{ + unsigned long i, v, m2p_start_mfn; + l3_pgentry_t l3e; + l2_pgentry_t l2e; + + /* M2P table is mappable read-only by privileged domains. */ + for ( v = RDWR_MPT_VIRT_START; + v != RDWR_MPT_VIRT_END; + v += 1 << L2_PAGETABLE_SHIFT ) + { + l3e = l4_pgentry_to_l3(idle_pg_table[l4_table_offset(v)])[ + l3_table_offset(v)]; + if ( !(l3_pgentry_val(l3e) & _PAGE_PRESENT) ) + continue; + l2e = l3_pgentry_to_l2(l3e)[l2_table_offset(v)]; + if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) ) + continue; + m2p_start_mfn = l2_pgentry_to_pagenr(l2e); + + for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) + { + frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1; + /* gdt to make sure it's only mapped read-only by non-privileged + domains. */ + frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1; + frame_table[m2p_start_mfn+i].u.inuse.domain = dom_xen; + } + } +} /* * Allows shooting down of borrowed page-table use on specific CPUs. @@ -165,19 +215,10 @@ void synchronise_pagetables(unsigned long cpu_mask) long do_stack_switch(unsigned long ss, unsigned long esp) { -#if 0 - int nr = smp_processor_id(); - struct tss_struct *t = &init_tss[nr]; - - /* We need to do this check as we load and use SS on guest's behalf. */ - if ( (ss & 3) == 0 ) + if ( (ss & 3) != 3 ) return -EPERM; - current->thread.guestos_ss = ss; current->thread.guestos_sp = esp; - t->ss1 = ss; - t->esp1 = esp; -#endif return 0; } diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c index c1ed33a03d..67618da2e7 100644 --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -6,6 +6,7 @@ #include #include #include +#include static int kstack_depth_to_print = 8*20; @@ -97,8 +98,82 @@ void show_registers(struct xen_regs *regs) show_stack((unsigned long *)regs->rsp); } +void show_page_walk(unsigned long addr) +{ + unsigned long page = read_cr3(); + + printk("Pagetable walk from %p:\n", addr); + + page &= PAGE_MASK; + page = ((unsigned long *) __va(page))[l4_table_offset(addr)]; + printk(" L4 = %p\n", page); + if ( !(page & _PAGE_PRESENT) ) + return; + + page &= PAGE_MASK; + page = ((unsigned long *) __va(page))[l3_table_offset(addr)]; + printk(" L3 = %p\n", page); + if ( !(page & _PAGE_PRESENT) ) + return; + + page &= PAGE_MASK; + page = ((unsigned long *) __va(page))[l2_table_offset(addr)]; + printk(" L2 = %p %s\n", page, (page & _PAGE_PSE) ? "(2MB)" : ""); + if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) ) + return; + + page &= PAGE_MASK; + page = ((unsigned long *) __va(page))[l1_table_offset(addr)]; + printk(" L1 = %p\n", page); +} + +#define DOUBLEFAULT_STACK_SIZE 1024 +static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE]; +asmlinkage void double_fault(void); + +asmlinkage void do_double_fault(struct xen_regs *regs) +{ + /* Disable the NMI watchdog. It's useless now. */ + watchdog_on = 0; + + /* Find information saved during fault and dump it to the console. */ + printk("************************************\n"); + printk("EIP: %04lx:[<%p>] \nEFLAGS: %p\n", + 0xffff & regs->cs, regs->rip, regs->eflags); + printk("rax: %p rbx: %p rcx: %p rdx: %p\n", + regs->rax, regs->rbx, regs->rcx, regs->rdx); + printk("rsi: %p rdi: %p rbp: %p rsp: %p\n", + regs->rsi, regs->rdi, regs->rbp, regs->rsp); + printk("r8: %p r9: %p r10: %p r11: %p\n", + regs->r8, regs->r9, regs->r10, regs->r11); + printk("r12: %p r13: %p r14: %p r15: %p\n", + regs->r12, regs->r13, regs->r14, regs->r15); + printk("************************************\n"); + printk("CPU%d DOUBLE FAULT -- system shutdown\n", + logical_smp_processor_id()); + printk("System needs manual reset.\n"); + printk("************************************\n"); + + /* Lock up the console to prevent spurious output from other CPUs. */ + console_force_lock(); + + /* Wait for manual reset. */ + for ( ; ; ) + __asm__ __volatile__ ( "hlt" ); +} + void __init doublefault_init(void) { + int i; + + /* Initialise IST1 for each CPU. Note the handler is non-reentrant. */ + for ( i = 0; i < NR_CPUS; i++ ) + init_tss[i].ist[0] = (unsigned long) + &doublefault_stack[DOUBLEFAULT_STACK_SIZE]; + + /* Set interrupt gate for double faults, specifying IST1. */ + set_intr_gate(TRAP_double_fault, &double_fault); + idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */ } void *decode_reg(struct xen_regs *regs, u8 b) diff --git a/xen/include/asm-x86/desc.h b/xen/include/asm-x86/desc.h index dc91d74f2b..53f4f91e1f 100644 --- a/xen/include/asm-x86/desc.h +++ b/xen/include/asm-x86/desc.h @@ -18,6 +18,7 @@ (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \ ((_s)&4)) && \ (((_s)&3) == 1)) +#define VALID_CODESEL(_s) ((_s) == FLAT_GUESTOS_CS || VALID_SEL(_s)) /* These are bitmasks for the high 32 bits of a descriptor table entry. */ #define _SEGMENT_TYPE (15<< 8) @@ -42,8 +43,6 @@ struct desc_struct { #define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY) #define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY) -#define VALID_CODESEL(_s) ((_s) == FLAT_RING3_CS64 || VALID_SEL(_s)) - typedef struct { u64 a, b; } idt_entry_t; @@ -83,8 +82,6 @@ do { \ #define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY) #define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY) -#define VALID_CODESEL(_s) ((_s) == FLAT_RING1_CS || VALID_SEL(_s)) - typedef struct desc_struct idt_entry_t; #define _set_gate(gate_addr,type,dpl,addr) \ diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 18c86e6245..9bea940db3 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -222,10 +222,7 @@ void synchronise_pagetables(unsigned long cpu_mask); */ #undef phys_to_machine_mapping -/* Don't call virt_to_phys on this: it isn't direct mapped. Using - m2p_start_mfn instead. */ #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START) -extern unsigned long m2p_start_mfn; #define phys_to_machine_mapping ((unsigned long *)PERDOMAIN_VIRT_START) #define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn) diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index fac0d8bc7f..f566be7809 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -428,7 +428,7 @@ struct thread_struct { u8 *io_bitmap; /* Pointer to task's IO bitmap or NULL */ /* Trap info. */ -#ifdef __i386__ +#ifdef ARCH_HAS_FAST_TRAP int fast_trap_idx; struct desc_struct fast_trap_desc; #endif @@ -442,7 +442,7 @@ struct thread_struct { extern idt_entry_t idt_table[]; extern idt_entry_t *idt_tables[]; -#if defined(__i386__) +#ifdef ARCH_HAS_FAST_TRAP #define SET_DEFAULT_FAST_TRAP(_p) \ (_p)->fast_trap_idx = 0x20; \ @@ -466,6 +466,13 @@ extern idt_entry_t *idt_tables[]; long set_fast_trap(struct exec_domain *p, int idx); +#else + +#define SET_DEFAULT_FAST_TRAP(_p) ((void)0) +#define CLEAR_FAST_TRAP(_p) ((void)0) +#define SET_FAST_TRAP(_p) ((void)0) +#define set_fast_trap(_p, _i) (0) + #endif #define INIT_THREAD { 0 } @@ -636,6 +643,7 @@ void show_guest_stack(); void show_trace(unsigned long *esp); void show_stack(unsigned long *esp); void show_registers(struct xen_regs *regs); +void show_page_walk(unsigned long addr); asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs); #endif /* !__ASSEMBLY__ */ diff --git a/xen/include/asm-x86/x86_64/regs.h b/xen/include/asm-x86/x86_64/regs.h index 42b8b3c0b8..0169109fe3 100644 --- a/xen/include/asm-x86/x86_64/regs.h +++ b/xen/include/asm-x86/x86_64/regs.h @@ -9,8 +9,8 @@ struct xen_regs u64 r14; u64 r13; u64 r12; - u64 rbp; - u64 rbx; + union { u64 rbp; u32 ebp; } __attribute__ ((packed)); + union { u64 rbx; u32 ebx; } __attribute__ ((packed)); /* NB. Above here is C callee-saves. */ u64 r11; u64 r10; @@ -25,8 +25,8 @@ struct xen_regs u32 entry_vector; union { u64 rip; u64 eip; } __attribute__ ((packed)); u64 cs; - u64 eflags; - u64 rsp; + union { u64 rflags; u64 eflags; } __attribute__ ((packed)); + union { u64 rsp; u64 esp; } __attribute__ ((packed)); u64 ss; } __attribute__ ((packed)); diff --git a/xen/include/public/arch-x86_32.h b/xen/include/public/arch-x86_32.h index 639ce6daea..39e770b894 100644 --- a/xen/include/public/arch-x86_32.h +++ b/xen/include/public/arch-x86_32.h @@ -137,9 +137,8 @@ typedef struct { } PACKED full_execution_context_t; typedef struct { - u64 mfn_to_pfn_start; /* MFN of start of m2p table */ - u64 pfn_to_mfn_frame_list; /* MFN of a table of MFNs that - make up p2m table */ + /* MFN of a table of MFNs that make up p2m table */ + u64 pfn_to_mfn_frame_list; } PACKED arch_shared_info_t; #define ARCH_HAS_FAST_TRAP diff --git a/xen/include/public/arch-x86_64.h b/xen/include/public/arch-x86_64.h index 60ca186fb6..6db6faa5c0 100644 --- a/xen/include/public/arch-x86_64.h +++ b/xen/include/public/arch-x86_64.h @@ -44,15 +44,22 @@ #define FLAT_RING3_CS32 0x0823 /* GDT index 260 */ #define FLAT_RING3_CS64 0x082b /* GDT index 261 */ -#define FLAT_RING3_DS 0x0833 /* GDT index 262 */ +#define FLAT_RING3_DS32 0x0833 /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 -#define FLAT_GUESTOS_DS FLAT_RING3_DS -#define FLAT_GUESTOS_CS FLAT_RING3_CS64 +#define FLAT_GUESTOS_DS64 FLAT_RING3_DS64 +#define FLAT_GUESTOS_DS32 FLAT_RING3_DS32 +#define FLAT_GUESTOS_DS FLAT_GUESTOS_DS64 +#define FLAT_GUESTOS_CS64 FLAT_RING3_CS64 #define FLAT_GUESTOS_CS32 FLAT_RING3_CS32 +#define FLAT_GUESTOS_CS FLAT_GUESTOS_CS64 -#define FLAT_USER_DS FLAT_RING3_DS -#define FLAT_USER_CS FLAT_RING3_CS64 -#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 /* And the trap vector is... */ #define TRAP_INSTR "syscall" @@ -89,22 +96,27 @@ typedef struct unsigned long r14; unsigned long r13; unsigned long r12; - unsigned long rbp; - unsigned long rbx; + union { unsigned long rbp, ebp; } PACKED; + union { unsigned long rbx, ebx; } PACKED; unsigned long r11; unsigned long r10; unsigned long r9; unsigned long r8; - unsigned long rax; - unsigned long rcx; - unsigned long rdx; - unsigned long rsi; - unsigned long rdi; - unsigned long rip; + union { unsigned long rax, eax; } PACKED; + union { unsigned long rcx, ecx; } PACKED; + union { unsigned long rdx, edx; } PACKED; + union { unsigned long rsi, esi; } PACKED; + union { unsigned long rdi, edi; } PACKED; + unsigned long _unused; + union { unsigned long rip, eip; } PACKED; unsigned long cs; - unsigned long eflags; - unsigned long rsp; + union { unsigned long rflags, eflags; } PACKED; + union { unsigned long rsp, esp; } PACKED; unsigned long ss; + unsigned long es; + unsigned long ds; + unsigned long fs; + unsigned long gs; } PACKED execution_context_t; typedef u64 tsc_timestamp_t; /* RDTSC timestamp */ @@ -132,9 +144,8 @@ typedef struct { } PACKED full_execution_context_t; typedef struct { - u64 mfn_to_pfn_start; /* MFN of start of m2p table */ - u64 pfn_to_mfn_frame_list; /* MFN of a table of MFNs that - make up p2m table */ + /* MFN of a table of MFNs that make up p2m table */ + u64 pfn_to_mfn_frame_list; } PACKED arch_shared_info_t; #endif /* !__ASSEMBLY__ */ -- 2.30.2